#include "stdafx.h"
#include "DataReader.h"
#include "math.h"

// SLOVNIK

CSlovnik::CSlovnik(CString word, int many)
{
	num_words = 0;
	num_prefixes = 0;
	this->word = word;

	next.clear();
	for (int i=0; i<many; i++)
		next.push_back(NULL);
}

CSlovnik::~CSlovnik()
{
	for (vector<CSlovnik*>::iterator it = next.begin(); it < next.end(); it++) {
		if (*it) delete (*it);
	}
	next.clear();
}

// PERMUTACIA

CPerm::CPerm(int count)
{
	this->count = count;
}

CPerm::CPerm(CPerm* permut)
{
	this->count = permut->count;
	for (int i=0; i<count; i++) {
		this->perm.push_back(permut->perm[i]);
	}
}	

CPerm::~CPerm()
{
	perm.clear();
}
	
void CPerm::Generate()
{
	perm.clear();
	for (int i=0; i<count; i++) {
		perm.push_back(i);
	}
	for (int i=0; i<count; i++) {
		int ran = rand()%(count-i) + i;
		int temp = perm[i];
		perm[i] = perm[ran];
		perm[ran] = temp;
		//TRACE("%c->%c ", i+'A', perm[i]+'A');
	}
}

void CPerm::swap(int a, int b)
{
	int temp = perm[a];
	perm[a] = perm[b];
	perm[b] = temp;
}

CString CPerm::GetString(CString str, CString active_l)
{
	for (int i=0; i<str.GetLength(); i++) {
		str.SetAt(i, active_l[perm[active_l.Find(str[i])]]);
	}
	return str;
}

inline int CPerm::GetChar(int ch, CString active_l)
{
	return active_l[perm[active_l.Find(ch)]];
}

inline int CPerm::GetIndex(int ch, CString active_l)
{
	return perm[active_l.Find(ch)];
}

CString CPerm::GetPerm(CString active_l)
{
	return GetString(active_l, active_l);
}

bool CPerm::IsIdentity()
{
	bool identity = (count > 0);
	for (int i=0; i<count; i++) {
		if (perm[i] != i) identity = false;
		if (identity == false) break;
	}

	return identity;
}

bool CPerm::Equals(CPerm* second)
{
	bool same = (count == second->count);
	for (int i=0; i<count; i++) {
		if (perm[i] != second->perm[i]) same = false;
		if (same == false) break;
	}
	return same;
}

int	CPerm::WrongChars(CString code, CString active_l, CString* misplaced)
{
	CString str = GetString(code, active_l);
	ASSERT(str.GetLength() == code.GetLength());

	if (misplaced) misplaced->SetString(_T(""));

	int wrong = 0;
	for (int i=0; i<code.GetLength(); i++) {
		if (code[i] != str[i]) {
			wrong++;
			if (misplaced != NULL && misplaced->Find(str[i]) < 0) misplaced->AppendChar(str[i]);
		}
	}

	return wrong;
}


// DATA

CData::CData(CEdit *status)
{
	this->status = status;

	for (int i=0; i<MAX_LETTERS; i++) {
		letters[i] = -1;
	}
	grams	= NULL;
	slovnik = NULL;

	gr_all = 0;
	num_words = 0;
	diff_words = 0;

	for (int i=0; i<NUM_RESULTS; i++) {
		results[i]	= -1e10;
		perm_res[i] = NULL;
		word_res[i] = 0;
	}
	
	srand(time(NULL));
}

void CData::ClearResults()
{
	for (int i=0; i<NUM_RESULTS; i++) {
		if (perm_res[i]) {
			delete perm_res[i];
		}
		results[i]	= -1e10;
		perm_res[i] = NULL;
		word_res[i] = 0;
	}
}

CData::~CData()
{
	if (grams) {
		delete grams;
	}
	if (slovnik) {
		delete slovnik;
	}
	for (int i=0; i<NUM_RESULTS; i++) {
		if (perm_res[i]) {
			delete perm_res[i];
		}
	}
}

int CData::LoadFromDir(CString dir, int ngrams)
{
	int i = -1;
	CFileFind finder;
	BOOL bWorking = finder.FindFile(dir);
	while (bWorking)
	{
		bWorking = finder.FindNextFile();
		if (finder.IsDots() || finder.IsDirectory()) continue;
		i++;
		CString file = finder.GetFilePath();
		LoadFromFile(file, ngrams);
	}
	return (i >= 0 ? i+1 : -1);
}

int CData::LoadFromFile(CString file, int ngrams)
{
	CString		str;
	CSlovnik*	word = slovnik;
	CStdioFileEx	input;

	if (!input.Open(file, CFile::modeRead | CFile::typeText | CFile::shareDenyWrite)) {
		str.Format(_T("Sbor \"%s\" neexistuje!"), file);
		if (status) status->SetWindowText(str);
		return -1;
	}
	input.SetCodePage(CP_UTF8);

	if (!input.ReadString(str)) {
		str.Format(_T("Sbor \"%s\" je przdny!"), file);
		if (status) status->SetWindowText(str);
		input.Close();
		return -1;
	} 
	str.MakeUpper();
	//::OutputDebugString(str + _T("\n"));
	
	// vytvorime gramy ak niesu
	if (!grams) {
		gr_length = 1;
		int n = ngrams;
		while (n--) gr_length *= num_letters;
		if (!grams)
		grams = new int[gr_length];
		for (int i=0; i<gr_length; i++) *(grams + i) = 0;
		gr_empty = gr_length;
		this->ngrams = ngrams;
	} else {
		ngrams = this->ngrams;
	}
	
	int			temp[6];				
	int			temp_p = 0;		// +1 = number read
	int			str_p = 0;
	int			ch;

	while (true) {
		while (str_p < str.GetLength() && (ch = Char(str[str_p])) == -1 && !IsSpace(str[str_p])) str_p++;
		if (str_p >= str.GetLength()) {
			if (!input.ReadString(str)) break;
			str.MakeUpper();
			str_p = 0;
			str += _T(" ");
			//::OutputDebugString(str+ _T("\n"));

		} else {			// ch je aktualne pismenko
			if (IsSpace(str[str_p])) {				// slovnik...
				if (word != slovnik) {
					if (word->num_words == 0) diff_words++;
					word->num_words++;
					num_words++;
					word = slovnik;
				}
				
				//TRACE(_T("word complete = ") + word->word + _T("\n"));
			} else {
				int i = active_l.Find(ch);
				ASSERT(i >= 0);
				word->num_prefixes++;
				if (word->next[i]) {
					word = word->next[i];
				} else {
					word->next[i] = new CSlovnik(word->word + (wchar_t)ch, active_l.GetLength());
					word = word->next[i];
				}
				//TRACE(_T("word ++ = ") + word->word + _T("\n"));
			}
			if (ch != -1) {				// ak je to pismenko ktore chceme
				temp[temp_p % ngrams] = ch;
				if (temp_p+1 >= ngrams) {
					AddGram(temp, (temp_p+1) % ngrams);
				}
				temp_p++;
			}
			str_p++;
		}
	}
	
	str.Format(_T("Sbor \"%s\" natan!"), file);
	if (status) status->SetWindowText(str);
	input.Close();
	return gr_all;
}

int	CData::Char(int ch)
{
	if (ch>=MAX_LETTERS || ch<0) return -1;
	if (letters[ch] == 0) return ch;
	if (letters[ch] == -1) return -1;
	else return Char(letters[ch]);
}

bool CData::IsSpace(int ch)
{
	if (ch>=MAX_LETTERS || ch<0) return false;
	if (ch == ' ') return true;
	if (letters[ch] == 0) return false;
	if (letters[ch] == -1) return false;
	else return IsSpace(letters[ch]);
}

void CData::AddGram(int* temp, int first)
{
	int i = IndexByArray(temp, first);
	if (!grams[i]) gr_empty--;
	grams[i]++;
	gr_all++;
}

void CData::SetupLetters(CString alphabet)
{
	num_letters = alphabet.GetLength();
	active_l = alphabet;
	if (!slovnik) {
		slovnik = new CSlovnik(_T(""), active_l.GetLength());
	}

	for (int i=0; i<num_letters; i++) {
		letters[alphabet[i]] = 0;
	}
	MakeAliases(ALIASES);
}

void CData::MakeAliases(CString text)
{
	text.MakeUpper();

	int start = 0;
	int pos = 0;
	CString str;
	
	while (start != -1) {
		pos = text.Find(' ', start);
		if (pos == -1) break;
		str = text.Mid(start, pos-start);
		MakeAlias(str, text[pos+1]);
		start = text.Find('\n', pos+2);
	}
	
	// nezvykly znak...
	letters[160] = ' ';
}

void CData::MakeAlias(CString aliases, int original)
{
	for (int i=0; i<aliases.GetLength(); i++) {
		ASSERT(aliases[i] != original);
		letters[aliases[i]] = original;
	}
}

int CData::IndexByArray(int* temp, int first)
{
	int p = 0;
	for (int i=0; i<ngrams; i++) {
		p *= num_letters;
		p += active_l.Find(temp[(first + i) % ngrams]);
		//TRACE(_T("(%d) %d * 26 = %d\n"), temp[(first + i) % ngrams], p, p * 26);
		//TRACE(_T("%c"), temp[(first + i) % ngrams]);
	}
	return p;
}

int CData::IndexByArray(LPTSTR temp)
{
	int p = 0;
	for (int i=0; i<ngrams; i++) {
		p *= num_letters;
		p += active_l.Find(temp[i]);
		//TRACE(_T("(%d) %d * 26 = %d\n"), temp[(first + i) % ngrams], p, p * 26);
		//TRACE(_T("%c"), temp[(first + i) % ngrams]);
	}

	return p;
}

int CData::IndexByPerm(LPTSTR temp, CPerm* perm)
{
	int p = 0;
	for (int i=0; i<ngrams; i++) {
		p *= num_letters;
		p += perm->perm[temp[i]-'A'];		// pozor zle ak ina abeceda...
		//TRACE(_T("(%d) %d * 26 = %d\n"), temp[(first + i) % ngrams], p, p * 26);
		//TRACE(_T("%c"), temp[(first + i) % ngrams]);
	}

	return p;
}


double CData::Language(CString str, int function)
{
	double res = -1e10;
	if (str.GetLength() < ngrams) return -1e10;

	switch (function) {
		case 1: return CountProb1(str);
		case 0: return CountProb2(str);
	}
	return CountProb1(str);
}

double CData::Language(CString str, CPerm* perm, int *wrds, int value)
{
	double res = -1e10;
	int prefix = 6;
	if (str.GetLength() < ngrams) return -1e10;
	
	LPTSTR p = str.GetBuffer();
	res = 0.0;

	if (wrds != NULL) *wrds = 0;

	int found = 0;
	int word = 0;

	for (int i=0; i<str.GetLength()-ngrams+1; i++) {		
		res += log(GramProbability(IndexByPerm(p+i, perm)));
		if (wrds != NULL && found <= 0) word = FindWords(slovnik, &str, i, prefix, perm);
		if (word) {
			(*wrds) += word;
			found = word * prefix;
			word = 0;
		}
		found--;
	} 
	res = res * 1000 / (double)(str.GetLength()-ngrams+1);
	// teraz je res nieco okolo -8000 +-1000

	//*wrds = words;
	if (wrds != NULL) { // pridame k nemu nieco, max value
		double plus = value * (*wrds) * prefix / (double)str.GetLength();
		res += plus;
	}
	// nejaka funkcia zo slov...
	return res;
}

inline double CData::GramProbability(int index)					// zmenime??
{
	return (grams[index]/(double)(gr_all) + 0.1/(double)gr_all);
}

double CData::CountProb1(CString str)					
{
	double res = 0.0;
	LPTSTR p = str.GetBuffer();

	for (int i=0; i<str.GetLength()-ngrams+1; i++) {		
		res += GramProbability(IndexByArray(p+i));
		/*if (status && grams[IndexByArray(p+i)] == 0) {
			CString str2;
			status->GetWindowText(str2);
			status->SetWindowText(str2 + _T("\n Gram: ") + str.Mid(i, ngrams) + _T(" nenjden!"));
		}*/
	} 
	res /= (double) (str.GetLength()-ngrams+1);

	return res;
}

double CData::CountProb2(CString str)					
{
	double res = 0.0;
	LPTSTR p = str.GetBuffer();

	for (int i=0; i<str.GetLength()-ngrams+1; i++) {		
		res += log(GramProbability(IndexByArray(p+i)));
	/*	if (status && grams[IndexByArray(p+i)] == 0) {
			CString str2;
			status->GetWindowText(str2);
			status->SetWindowText(str2 + _T("\n Gram: ") + str.Mid(i, ngrams) + _T(" nenjden!"));
		}*/
	} 
	//res /= (double) (str.GetLength()-ngrams+1);
	//res = pow(res, 1/(double) (str.GetLength()-ngrams+1));

	return res;
}

CString CData::Solve(CString str, int many, int slovnik)
{
	RemoveBadChars(str);

	num_perms = 0;

	int g=0,h=0;

	CPerm		*perm = new CPerm(num_letters);

	code = str;
	int *wrds = (slovnik ? new int : NULL);


	double res, oldres;

	while (many--) {
		bool top = false;
		g++;
		perm->Generate();
		
		oldres = Language(str, perm, wrds, slovnik);
		AddToResults(oldres, perm, (slovnik ? *wrds : 0));
		num_perms++;
		
		int j, k;

		while (!top) {
			h++;
			top = true;
			for (j=0; j<num_letters; j++) {
				for (k=j+1; k<num_letters; k++) {
					perm->swap(j, k);
					num_perms++;
					res = Language(str, perm, wrds, slovnik);
					if (res <= oldres) {
						if (res != oldres)
							perm->swap(j, k);
					} else {
						top = false;
						AddToResults(res, perm, (slovnik ? *wrds : 0));
						oldres = res;
					}
				}
			}
		}
		//AddToResults(res, perm, (slovnik ? *wrds : 0));
		//if (perm_res[0]->IsIdentity() || perm_res[0]->GetString(code, active_l) == code) break; // iba na zrychlenie ( ak najdem riesenie tak koncim )
	}

	delete perm;
	if (slovnik) delete wrds;
	
	return perm_res[0]->GetString(str, active_l);
}

void CData::RemoveBadChars(CString &str)
{
	str.MakeUpper();
	int ch;
	for (int i=0; i<str.GetLength(); i++) {					// zle znaky prec
		if ((ch = Char(str[i])) == -1) {
			str.Remove(str[i]);
			i--;
		} else {
			str.Replace(str[i], ch);
		}
	}
}

void CData::AddToResults(double res, CPerm* perm, int wrds, bool erase)
{
	if (results[NUM_RESULTS-1]>=res) {
		if (erase) delete perm;
		return;
	}

	int			i = 0, t_word;
	double		dou;
	CPerm*		t_perm;
	
	while (i<NUM_RESULTS && results[i]>=res) {
		if (results[i] == res && perm->Equals(perm_res[i])) {
			if (erase) delete perm;
			return;
		}
		i++;
	}
	if (i == NUM_RESULTS) {
		if (erase) delete perm;
		return;
	}

	dou = results[i];
	t_perm = perm_res[i];
	t_word = word_res[i];

	results[i] = res;
	word_res[i] = wrds;
	if (erase)	perm_res[i] = perm;
	else		perm_res[i] = new CPerm(perm);

	if (i != NUM_RESULTS-1 && dou > -1e10) AddToResults(dou, t_perm, t_word, true);
	else if (t_perm) delete t_perm;	
}

CString CData::GetResult(int i, double* d)
{
	if (d != NULL) (*d) = results[i];
	return perm_res[i]->GetString(code, active_l);
}

int CData::FindWords(CSlovnik* word, CString* str, int str_p, int prefix, CPerm* perm)
{
	if (prefix == 0) return 1;
	if (str_p >= str->GetLength()) return 0;
	CSlovnik* next;
	if (perm) {
		ASSERT(perm->GetIndex(str->GetAt(str_p), active_l) >= 0);
		next = word->next[perm->GetIndex(str->GetAt(str_p), active_l)];
	} else {
		ASSERT(active_l.Find(str->GetAt(str_p)) >= 0);
		next = word->next[active_l.Find(str->GetAt(str_p))];
	}
	if (next) {
		return FindWords(next, str, str_p+1, prefix-1, perm);
	} else return 0;
}

void CData::OutputToFile(CString filename, int param)
{
	CStdioFileEx	output;
	ASSERT(output.Open(filename, CFile::modeWrite | CFile::typeText | CFile::shareDenyWrite | CFile::modeCreate | CFile::modeNoTruncate));
	output.SetCodePage(CP_UTF8);
	output.SeekToEnd();

	CString str, str2, str3 = _T(""), str4;
	
	CPerm perm(active_l.GetLength());
	double dou;
	int wrds;
	for (int i=0;i<perm.count;i++) perm.perm.push_back(i);
	ASSERT(perm.IsIdentity());
	dou = Language(code, &perm, &wrds);

	for (int i=0; i<NUM_RESULTS; i++) {
		str.Format(_T("%d, %.0f, %d "), code.GetLength(), results[i], num_perms);
		if (perm_res[i]) {
			str2.Format(_T(" %d "), perm_res[i]->WrongChars(code, active_l, &str4));
			str2 += MissingLetters(code) + _T(" ");
			str3.Format(_T(" %d naj= %.0f %d "), word_res[i], dou, wrds);
			str3 += perm_res[i]->GetString(code, active_l);
		} else {
			str2 = _T("<WRONG PERM> ") + MissingLetters(code) + _T(" ");
			str3.Format(_T(" %d naj= %.0f %d "), word_res[i], dou, wrds);
			str3 += _T("<WRONG CODE>");
		}
		output.WriteString(str + (perm_res[i] ? perm_res[i]->GetPerm(active_l).MakeLower() : _T("nothing...")) + str2 + str4 + str3 + _T("\r\n"));
	}
	output.WriteString(_T("\r\n"));
	
	output.Close();
}

void CData::ConcatFiles(CString out, CString in)
{
	CStdioFileEx input, output;
	output.Open(out, CFile::modeWrite | CFile::typeText | CFile::shareDenyWrite | CFile::modeCreate | CFile::modeNoTruncate);
	output.SetCodePage(CP_UTF8);
	output.SeekToEnd();
	
	CString str, str2, str3;
	for (int x = 20; x<=500; x+=30) {
		str2 = in;
		str3.Format(_T("%03d"), x);
		str2.Replace(_T("???"), str3);

		input.Open(str2, CFile::modeRead | CFile::typeText | CFile::shareDenyRead);
		input.SetCodePage(CP_UTF8);

		while(input.ReadString(str)) {
			output.WriteString(str);
			output.WriteString(_T("\r\n"));
		}
		input.Close();
	}
	output.Close();
}

CString CData::RandomInput(CString filename, int chars)
{
	ASSERT(active_l.GetLength()); // uz su nacitane pismena...
	CString res, str;

	CStdioFileEx	file;
	if (!file.Open(filename, CFile::modeRead | CFile::typeText | CFile::shareDenyWrite)) {
		MessageBox(NULL, _T("Nepodarilo sa otvorit subor ") + filename, _T("Chyba!!!"), 0);
		return _T("");
	}
	file.SetCodePage(CP_UTF8);
	ULONGLONG char_count = file.GetCharCount();
	char_count -= 5*chars;		// nebude tam viac ako 4*zlych znakov, inak sme v pecku...
	ULONGLONG randuint = (ULONGLONG)rand() * (ULONGLONG)rand();
	randuint %= char_count;
	LONGLONG rand2 = randuint;

	file.Seek(rand2, CFile::begin);
	while(res.GetLength() < chars) {
		bool b = file.ReadString(str);
		RemoveBadChars(str);
		res += str;
	}

	file.Close();
	return res.Left(chars);
}

CString CData::MissingLetters(CString str)
{
	CString res(_T(""));
	for (int i=0; i<active_l.GetLength(); i++) {
		if (str.Find(active_l[i]) == -1) res += active_l[i];
	}

	return res;
}

void CData::DoScript(CString filename, bool once)
{
	CStdioFileEx	script;
	if (!script.Open(filename, CFile::modeRead | CFile::typeText | CFile::shareDenyWrite)) {
		MessageBox(NULL, _T("Nepodarilo sa otvorit subor ") + filename, _T("DAMN!!!"), 0);
		return ;
	}
	if (status) status->SetWindowText(_T("Script ") + filename + _T(" started."));
	script.SetCodePage(CP_UTF8);

	int many = 20;
	CString output = _T("out_") + filename;
	int command = 0;
	CString str, str2;
		
	// veeela opakovani
	
	for (int x = 350; x>=20; x -= 30) {
		CString iks;
		iks.Format(_T("%03d"), x);

		while(script.ReadString(str)) {
			str.Replace(_T("???"), iks);

			if (str.Left(6) == _T("SOLVE ")) {
				Solve(str.Mid(6), many);
				OutputToFile(output);
				command++;
			} else if (str.Left(9) == _T("GENERATE ")) {
				LoadFromFile(str.Mid(11), str[9] - '0');
			} else if (str.Left(7) == _T("GENDIR ")) {
				LoadFromDir(str.Mid(9), str[7] - '0');
			} else if (str.Left(7) == _T("OUTPUT ")) {
				OutputToFile(str.Mid(7));
				ClearResults();
			} else if (str.Left(8) == _T("LETTERS ")) {
				SetupLetters(str.Mid(8));
			} else if (str.Left(14) == _T("OUTPUTNOCLEAN ")) {
				OutputToFile(str.Mid(14));
			} else if (str.Left(11) == _T("SOLVECOUNT ")) {
				ClearResults();
				str = str.Mid(11);
				int i=0;
				int p=0;
				while (str[i] >= '0' && str[i] <= '9') {
					p *= 10;
					p += str[i] - '0'; 
					i++;
				}
				str = str.Mid(i+1);
				RemoveBadChars(str);
				Solve(str.Left(p), many);
				OutputToFile(output);
				//OutputToFile(_T("all.txt"));
				command++;
			} else if (str.Left(10) == _T("SETOUTPUT ")) {
				output = str.Mid(10);
				command++;
			} else if (str.Left(8) == _T("SETMANY ")) {
				str = str.Mid(8);
				int i=0;
				int p=0;
				while (str[i] >= '0' && str[i] <= '9') {
					p *= 10;
					p += str[i] - '0'; 
					i++;
				}
				many = p;
			} else if (str.Left(10) == _T("SOLVERAND ")) {	// cislo = pocet opakovani...
				ClearResults();
				str = str.Mid(10);
				int i=0;
				int p=0;
				while (str[i] >= '0' && str[i] <= '9') {
					p *= 10;
					p += str[i] - '0'; 
					i++;
				}
				str = str.Mid(i+1);
				for (int j=0; j<p; j++) {
					ClearResults();
					Solve(RandomInput(str, x), many);
					OutputToFile(output, x);
					CString all;
					all.Format(_T("all1.txt"));
					OutputToFile(all, x);
				}
			}
			str2.Format(_T("Command %d completed."), command);
			if (status) status->SetWindowText(str2);
			
		}

		script.SeekToBegin();
		if (once) break;

	}

	//for (int j=0; j<50;j++) MessageBeep(1);
	script.Close();
}

